From 5125480477dc4f115924c5b426eeac45114050d0 Mon Sep 17 00:00:00 2001 From: tsteven4 <13596209+tsteven4@users.noreply.github.com> Date: Wed, 3 May 2023 12:55:48 -0600 Subject: [PATCH] use QRegularExpression for xml generic tag matching. (#1099) * use QRegularExpression for xml generic tag matching. The kml reader now creates tracks from innerBoundryIs elements in addition to the historic creation from outerBoundryIs elements. * collapse kml 2.3 track elements * const correctness improvements for xmlgeneric. --- defs.h | 2 - garmin_device_xml.cc | 3 +- kml.h | 29 +++++------- util.cc | 108 ------------------------------------------- xmlgeneric.cc | 30 ++++++------ xmlgeneric.h | 27 +++++++---- 6 files changed, 46 insertions(+), 153 deletions(-) diff --git a/defs.h b/defs.h index 674ead1b7..3d217d559 100644 --- a/defs.h +++ b/defs.h @@ -1016,8 +1016,6 @@ inline int case_ignore_strncmp(const QString& s1, const QString& s2, int n) return s1.left(n).compare(s2.left(n), Qt::CaseInsensitive); } -int str_match(const char* str, const char* match); - [[gnu::format(printf, 2, 3)]] int xasprintf(char** strp, const char* fmt, ...); [[gnu::format(printf, 2, 3)]] int xasprintf(QString* strp, const char* fmt, ...); [[gnu::format(printf, 2, 3)]] int xasprintf(QScopedPointer& strp, const char* fmt, ...); diff --git a/garmin_device_xml.cc b/garmin_device_xml.cc index 2b43a9e94..6c4506256 100644 --- a/garmin_device_xml.cc +++ b/garmin_device_xml.cc @@ -113,7 +113,7 @@ void dir_s(xg_string args, const QXmlStreamAttributes*) path = nullptr; } -static xg_tag_mapping gdx_map[] = { +static QList gdx_map = { { device_s, cb_cdata, "/Device/Model/Description" }, { id_s, cb_cdata, "/Device/Id" }, { path_s, cb_cdata, "/Device/MassStorageMode/DataType/File/Location/Path" }, @@ -121,7 +121,6 @@ static xg_tag_mapping gdx_map[] = { { ext_s, cb_cdata, "/Device/MassStorageMode/DataType/File/Location/FileExtension" }, { base_s, cb_cdata, "/Device/MassStorageMode/DataType/File/Location/BaseName" }, { dir_s, cb_cdata, "/Device/MassStorageMode/DataType/File/TransferDirection" }, - { nullptr, (xg_cb_type) 0, nullptr } }; const gdx_info* diff --git a/kml.h b/kml.h index 81875767f..f11e2ebbd 100644 --- a/kml.h +++ b/kml.h @@ -341,25 +341,18 @@ private: {&KmlFormat::wpt_time, cb_cdata, "/Placemark/TimeStamp/when"}, // Alias for above used in KML 2.0 {&KmlFormat::wpt_time, cb_cdata, "/Placemark/TimeInstant/timePosition"}, - {&KmlFormat::wpt_coord, cb_cdata, "/Placemark/Point/coordinates"}, - {&KmlFormat::wpt_coord, cb_cdata, "/Placemark/MultiGeometry/Point/coordinates"}, + {&KmlFormat::wpt_coord, cb_cdata, "/Placemark/(.+/)?Point/coordinates"}, {&KmlFormat::wpt_icon, cb_cdata, "/Placemark/Style/Icon/href"}, - {&KmlFormat::trk_coord, cb_cdata, "/Placemark/MultiGeometry/LineString/coordinates"}, - {&KmlFormat::trk_coord, cb_cdata, "/Placemark/GeometryCollection/LineString/coordinates"}, - {&KmlFormat::trk_coord, cb_cdata, "/Placemark/Polygon/outerBoundaryIs/LinearRing/coordinates"}, - {&KmlFormat::trk_coord, cb_cdata, "/Placemark/LineString/coordinates"}, - {&KmlFormat::gx_trk_s, cb_start, "/Placemark/*gx:Track"}, - {&KmlFormat::gx_trk_e, cb_end, "/Placemark/*gx:Track"}, - {&KmlFormat::gx_trk_when, cb_cdata, "/Placemark/*gx:Track/when"}, - {&KmlFormat::gx_trk_coord, cb_cdata, "/Placemark/*gx:Track/gx:coord"}, - {&KmlFormat::gx_trk_s, cb_start, "/Placemark/Track"}, // KML 2.3 - {&KmlFormat::gx_trk_e, cb_end, "/Placemark/Track"}, // KML 2.3 - {&KmlFormat::gx_trk_when, cb_cdata, "/Placemark/Track/when"}, // KML 2.3 - {&KmlFormat::gx_trk_coord, cb_cdata, "/Placemark/Track/coord"}, // KML 2.3 - {&KmlFormat::gx_trk_s, cb_start, "/Placemark/MultiTrack/Track"}, // KML 2.3 - {&KmlFormat::gx_trk_e, cb_end, "/Placemark/MultiTrack/Track"}, // KML 2.3 - {&KmlFormat::gx_trk_when, cb_cdata, "/Placemark/MultiTrack/Track/when"}, // KML 2.3 - {&KmlFormat::gx_trk_coord, cb_cdata, "/Placemark/MultiTrack/Track/coord"} // KML 2.3 + {&KmlFormat::trk_coord, cb_cdata, "/Placemark/(.+/)?LineString/coordinates"}, + {&KmlFormat::trk_coord, cb_cdata, "/Placemark/(.+)/?LinearRing/coordinates"}, + {&KmlFormat::gx_trk_s, cb_start, "/Placemark/(.+/)?gx:Track"}, + {&KmlFormat::gx_trk_e, cb_end, "/Placemark/(.+/)?gx:Track"}, + {&KmlFormat::gx_trk_when, cb_cdata, "/Placemark/(.+/)?gx:Track/when"}, + {&KmlFormat::gx_trk_coord, cb_cdata, "/Placemark/(.+/)?gx:Track/gx:coord"}, + {&KmlFormat::gx_trk_s, cb_start, "/Placemark/(.+/)?Track"}, // KML 2.3 + {&KmlFormat::gx_trk_e, cb_end, "/Placemark/(.+/)?Track"}, // KML 2.3 + {&KmlFormat::gx_trk_when, cb_cdata, "/Placemark/(.+/)?Track/when"}, // KML 2.3 + {&KmlFormat::gx_trk_coord, cb_cdata, "/Placemark/(.+/)?Track/coord"}, // KML 2.3 }; // The TimeSpan/begin and TimeSpan/end DateTimes: diff --git a/util.cc b/util.cc index 31ab466e0..598891d76 100644 --- a/util.cc +++ b/util.cc @@ -318,114 +318,6 @@ xvasprintf(char** strp, const char* fmt, va_list ap) return outsize; } -/* - * compare str with match - * match may contain wildcards "*" and "?" - * - * examples: - * str_match("ABCDE", "*BC*") -> 1 - * str_match("ABCDE", "A*C*E") -> 1 - * str_match("?ABCDE", "\\?A*") -> 1 - * str_match("", "*A") -> 0 - */ - -int -str_match(const char* str, const char* match) -{ - const char* s = str; - const char* m = match; - - while (*m || *s) { - switch (*m) { - - case '\0': - /* there is something left in s, FAIL */ - return 0; - - case '*': - /* skip all wildcards */ - while ((*m == '*') || (*m == '?')) { - m++; - } - if (*m == '\0') { - return 1; - } - - if (*m == '\\') { /* ? escaped ? */ - m++; - if (*m == '\0') { - return 0; - } - } - - do { - while (*s && (*s != *m)) { - s++; - } - if (*s == '\0') { - return 0; - } - - const char* sx = s + 1; - const char* mx = m + 1; - - while (*sx) { - if (*mx == '\\') { /* ? escaped ? */ - mx++; - if (*mx == '\0') { - return 0; - } - - } - if (*sx == *mx) { - sx++; - mx++; - } else { - break; - } - } - if (*mx == '\0') { /* end of match */ - if (*sx == '\0') { - return 1; - } - s++; - } else if ((*mx == '?') || (*mx == '*')) { - s = sx; - m = mx; - break; - } else { - s++; - } - } while (*s); - break; - - case '?': - if (*s == '\0') { - return 0; /* no character left */ - } - m++; - s++; - break; - - case '\\': - m++; - if (*m == '\0') { - return 0; /* incomplete escape sequence */ - } - /* pass-through next character */ - [[fallthrough]]; - - default: - if (*m != *s) { - return 0; - } - m++; - s++; - } - } - return ((*s == '\0') && (*m == '\0')); -} - void printposn(const double c, bool is_lat) { diff --git a/xmlgeneric.cc b/xmlgeneric.cc index 841522b89..0d5a70c24 100644 --- a/xmlgeneric.cc +++ b/xmlgeneric.cc @@ -41,7 +41,7 @@ enum xg_shortcut { xg_shortcut_ignore }; -static QList* xg_tag_tbl; +static const QList* xg_tag_tbl; static bool dynamic_tag_tbl; static QHash* xg_shortcut_taglist; @@ -66,11 +66,12 @@ static QTextCodec* codec = utf8_codec; // Qt has no vanilla ASCII encoding =( static XgCallbackBase* xml_tbl_lookup(const QString& tag, xg_cb_type cb_type) { - const QByteArray key = tag.toUtf8(); - const char* keyptr = key.constData(); - for (const auto& tm : qAsConst(*xg_tag_tbl)) { - if ((cb_type == tm.cb_type) && str_match(keyptr, tm.tag_name)) { - return tm.tag_cb; + for (const auto& tm : *xg_tag_tbl) { + if (cb_type == tm.cb_type) { + QRegularExpressionMatch match = tm.tag_re.match(tag); + if (match.hasMatch()) { + return tm.tag_cb; + } } } return nullptr; @@ -104,7 +105,7 @@ xml_common_init(const QString& fname, const char* encoding, } void -xml_init(const QString& fname, QList* tbl, const char* encoding, +xml_init(const QString& fname, const QList* tbl, const char* encoding, const char* const* ignorelist, const char* const* skiplist, bool dynamic_tbl) { xg_tag_tbl = tbl; @@ -114,15 +115,18 @@ xml_init(const QString& fname, QList* tbl, const char* encodin } void -xml_init(const QString& fname, xg_tag_mapping* tbl, const char* encoding, +xml_init(const QString& fname, const QList& tbl, const char* encoding, const char* const* ignorelist, const char* const* skiplist) { - xg_tag_tbl = new QList; + auto* tag_tbl = new QList; dynamic_tag_tbl = true; - for (xg_tag_mapping* tm = tbl; tm->tag_cb != nullptr; ++tm) { - auto* cb = new XgFunctionPtrCallback(tm->tag_cb); - xg_tag_tbl->append({cb, tm->cb_type, tm->tag_name}); + for (const auto& tm : tbl) { + auto* cb = new XgFunctionPtrCallback(tm.tag_cb); + QRegularExpression re(QRegularExpression::anchoredPattern(tm.tag_pattern)); + assert(re.isValid()); + tag_tbl->append({cb, tm.cb_type, re}); } + xg_tag_tbl = tag_tbl; xml_common_init(fname, encoding, ignorelist, skiplist); } @@ -131,7 +135,7 @@ void xml_deinit() { if (dynamic_tag_tbl) { - for (const auto& tm : qAsConst(*xg_tag_tbl)) { + for (const auto& tm : *xg_tag_tbl) { delete tm.tag_cb; } delete xg_tag_tbl; diff --git a/xmlgeneric.h b/xmlgeneric.h index 5c7189fb5..101efa3d3 100644 --- a/xmlgeneric.h +++ b/xmlgeneric.h @@ -22,8 +22,13 @@ #ifndef XMLGENERIC_H_INCLUDED_ #define XMLGENERIC_H_INCLUDED_ -#include // for QString -#include // for QXmlStreamAttributes +#include // for assert + +#include // for QList +#include // for QRegularExpression +#include // for QString +#include // for QXmlStreamAttributes + // Maybe the XmlGeneric string callback really shouldn't have a type // of its own; this was a crutch during the move from char* to QString. @@ -87,7 +92,7 @@ private: struct xg_tag_map_entry { XgCallbackBase* tag_cb; xg_cb_type cb_type; - const char* tag_name; + QRegularExpression tag_re; }; // Table generation from an array containing function pointers. @@ -98,7 +103,7 @@ using xg_callback = void (xg_string, const QXmlStreamAttributes*); struct xg_tag_mapping { xg_callback* tag_cb; xg_cb_type cb_type; - const char* tag_name; + const char* tag_pattern; }; // Table generation from a list containing member function pointers. @@ -108,16 +113,18 @@ struct xg_functor_map_entry { using XgCb = void (MyFormat::*)(xg_string, const QXmlStreamAttributes*); XgCb tag_cb; xg_cb_type cb_type; - const char* tag_name; + const char* tag_pattern; }; template QList* build_xg_tag_map(MyFormat* instance, const QList& map) { auto* tag_tbl = new QList; - for (const auto& entry : qAsConst(map)) { + for (const auto& entry : map) { auto* tag_cb = new XgFunctor(instance, entry.tag_cb); - tag_tbl->append({tag_cb, entry.cb_type, entry.tag_name}); + QRegularExpression re(QRegularExpression::anchoredPattern(entry.tag_pattern)); + assert(re.isValid()); + tag_tbl->append({tag_cb, entry.cb_type, re}); } return tag_tbl; } @@ -135,7 +142,7 @@ QList* build_xg_tag_map(MyFormat* instance, const QList* build_xg_tag_map(MyFormat* instance, const QList* tbl, const char* encoding, +void xml_init(const QString& fname, const QList* tbl, const char* encoding, const char* const* ignorelist = nullptr, const char* const* skiplist = nullptr, bool dynamic_tbl = false); -void xml_init(const QString& fname, xg_tag_mapping* tbl,const char* encoding, +void xml_init(const QString& fname, const QList& tbl,const char* encoding, const char* const* ignorelist = nullptr, const char* const* skiplist = nullptr); void xml_read(); -- 2.30.2